% Information: 
% downloaded data starts from 1980 Jan to 2020 Sept
% Each column is a country and each entry is Mb/d = thousands of Barrels
% per day..
%https://www.eia.gov/international/data/world/petroleum-and-other-liquids/monthly-petroleum-and-other-liquids-production?pd=5&p=0000000000000000000000000000000000vg&u=0&f=M&v=mapbubble&a=-&i=none&vo=value&&t=C&g=00000000000000000000000000000000000000000000000001&l=249-ruvvvvvfvtvnvv1vrvvvvfvvvvvvfvvvou20evvvvvvvvvvnvvvs0008&s=94694400000&e=1598918400000

% updated: August 1, 2022 (added the new PPI deflated price data)

clear
clc
opts = spreadsheetImportOptions("NumVariables", 40);

thisFile = mfilename('fullpath');
thisDir = fileparts(thisFile);
cd(fileparts(fileparts(thisDir)));

% Specify sheet and range
opts.Sheet = "SeriesExport-12-22-2020_12-27-0";
opts.DataRange = "A10:AN498";

% Specify column names and types
opts.VariableNames = ["Var1", "ANGOLA", "Var3", "UAE", "Var5", "CANADA", "Var7", "CHINA", "Var9", "ALGERIA", "Var11", "ECUADOR", "Var13", "EGYPT", "Var15", "UK", "Var17", "IRAN", "Var19", "IRAQ", "Var21", "KUWAIT", "Var23", "LIBYA", "Var25", "MEXICO", "Var27", "NIGERIA", "Var29", "NORWAY", "Var31", "QATAR", "Var33", "RUSSIA", "Var35", "SAUDIARABIA", "Var37", "USA", "Var39", "VENEZUELA"];
opts.SelectedVariableNames = ["ANGOLA", "UAE", "CANADA", "CHINA", "ALGERIA", "ECUADOR", "EGYPT", "UK", "IRAN", "IRAQ", "KUWAIT", "LIBYA", "MEXICO", "NIGERIA", "NORWAY", "QATAR", "RUSSIA", "SAUDIARABIA", "USA", "VENEZUELA"];
opts.VariableTypes =         ["char", "double", "char", "double", "char", "double", "char", "double", "char", "double", "char", "double", "char", "double", "char", "double", "char", "double", "char", "double", "char", "double", "char", "double", "char", "double", "char", "double", "char", "double", "char", "double", "char", "double", "char", "double", "char", "double", "char", "double"];
opts = setvaropts(opts, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39], "WhitespaceRule", "preserve");
opts = setvaropts(opts, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 34, 35, 37, 39], "EmptyFieldRule", "auto");

% Import the Crude Oil Production data (Mb/d =thousand barrels per day)
CrudeOil = readtable("data/raw/CrudeOil.xls", opts, "UseExcel", false);

% focus on data from 1992 Jan (before that no Russia) to 2019 Dec (after that COVID) 
 
% first drop the last 9 rows 
CrudeOil = CrudeOil(1:480,:); 

% second, generate the date
Yr = reshape(repmat((1992:2019), 12, 1), [],1);
Mo = repmat(1:12, 1, numel(Yr)/12)';
Da = 1;
date = datetime(Yr,Mo,Da, 'Format','MMM-yyyy');

% drop the first 144 rows that go from 1980 Jan to 1991 Dec 
CrudeOil = CrudeOil(145:end,:); 

% Import Price data from 1990 jan to 2020 
Nominal = readtable("data/raw/Price_per_barrel.csv");

% discard everything in from 1990 Jan to 1991 Dec
Nominal = Nominal(25:end,:); % remove 1990 and 1991
Nominal = Nominal(1:336,:); % remove 2020


% Import (quarterly) Deflator (2019 Q4 =100) 
Deflator = readtable("data/raw/Deflator.xls");
Deflator_oil_drilling = readtable("data/raw/Deflator_PCU213111213111P.csv");

D = repmat(Deflator{:,2},1,3)';
D = D(:); 

D_oil_drilling = repmat(Deflator_oil_drilling{:,2},1,3)';
D_oil_drilling = D_oil_drilling(:); 

% Real Price of Crude Oil
Real_price = Nominal{:,2}./100.*D; 
Real_price_oil_drilling = Nominal{:,2}./100.*D_oil_drilling; 


% save
OilData.Production = CrudeOil;
OilData.Date = date; 
OilData.Price = Real_price; 
OilData.Price_oil_drilling_ppi_deflated = Real_price_oil_drilling;
save('data/processed/OilData','OilData');

